package com.heima.wemedia;

import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;
import org.openqa.selenium.WebDriver;
import org.openqa.selenium.chrome.ChromeDriver;

import java.io.IOException;
import java.net.URLDecoder;

/**
 * @author mrchen
 * @date 2022/1/6 15:10
 */
public class ReptilesDemo {
    Document document;

    @BeforeEach
    public void init(){
        System.setProperty("webdriver.chrome.driver", "C:\\worksoft\\tools\\chromedriver.exe");
        WebDriver driver = new ChromeDriver();
        driver.get("https://3g.163.com/touch/ent/?ver=c&clickfrom=index2018_header_main");
        document = Jsoup.parse(driver.getPageSource());
    }
    @Test
    public void getData() throws IOException {
//         Document document = Jsoup.connect("https://3g.163.com/touch/ent/?ver=c&clickfrom=index2018_header_main")
//                .header("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.183 Safari/537.36")
//                .get();
//        System.out.println(document);
        Elements articleList = document.getElementsByTag("article");
        for (Element articleEle : articleList) {
            try {
                Element titleEle = articleEle.getElementsByClass("title").get(0);
                System.out.println("文章标题:  " + titleEle.text());

                Element aEle = articleEle.getElementsByTag("a").get(0);
                System.out.println("文章详情url: " + aEle.attr("href"));

                Element newsPicDiv = articleEle.getElementsByClass("news-pic").get(0);
                Elements imgList = newsPicDiv.getElementsByTag("img");
                for (Element img : imgList) {
                    String attr = img.attr("data-src");
                    String[] split = attr.split("\\?");
                    String s = split[1];
                    String substring = s.substring(s.indexOf("=") + 1, s.indexOf("&"));
                    System.out.println("封面图片地址: " + URLDecoder.decode(substring,"UTF8"));
                }
            } catch (Exception e) {
//                e.printStackTrace();
            }
        }
    }
}